#### Fit the OLS model using the Bayesian, single-region approach.

### Prepare environment

setwd("~/research/coffee-dataverse/src")
do.origspec <- T

library(dplyr)
library(lfe)
library(splines)
library(MASS)
library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

source("load.R", encoding="iso-8859-1")

output.suffix <- paste0("-ols", ifelse(do.origspec, "-orig", "-minx"))

### Core Bayesian model

stan.model.ols <- "
data {
  int<lower=0> N; // observations
  int<lower=0> T; // periods
  int<lower=0> M; // regions
  int<lower=0> K; // weather data values for yield

  int<lower=1, upper=M> region[N];
  int year2000[N];

  // predictors
  matrix[N, K] weather_yield;

  // outcomes
  vector[N] production;
  vector[N] harvest;
}
transformed data {
  vector[N] logyield = log(production ./ harvest);
}
parameters {
  real<upper=3> yield_fe[M]; // best yield < 20

  real scale_trend;
  vector[K] yield_coeff;
  real<lower=0> yield_sigma[M];
}
transformed parameters {
  vector[N] logyield_meanpart;

  logyield_meanpart = weather_yield * yield_coeff;
}
model {
  // Distributional models
  for (ii in 1:N) {
    logyield[ii] ~ normal(yield_fe[region[ii]] + logyield_meanpart + scale_trend * year2000[ii], yield_sigma[region[ii]]);
  }
}"

### Prepare data

## Calculate bearing areas
df$bearing.guess <- NA
for (region in unique(df$Munic�pio)) {
    rows <- which(df$Munic�pio == region)
    bearing <- 0
    for (year in min(df$year[rows]):max(df$year[rows])) {
        rr <- rows[which(df$year[rows] == year)]
        bearing <- max(bearing * .85, df$total.harvest[rr])
        df$bearing.guess[rr] <- bearing
    }
}

df$yield.guess <- df$total.produce / df$bearing.guess
df$logyield.guess <- log(df$yield.guess)
df$logyield.guess[!is.finite(df$logyield.guess)] <- NA

olsmod <- felm(as.formula(paste("logyield.guess ~", mainspec.rhs, "| 0 | Munic�pio + year")), data=df)

## Calculate weather-only residuals to get trend prior and FE
##   Determine fe and trend priors across all regions
df$ols.resid <- df$logyield.guess - as.matrix(df[, weatherpreds]) %*% matrix(olsmod$coefficients[1:length(weatherpreds)], length(weatherpreds), 1)
df$year2000 <- df$year - 2000
linmod <- felm(ols.resid ~ year2000 | Munic�pio | 0 | Munic�pio, df)
linfes <- getfe(linmod, se=T, cluster=T, robust=T)

### Calibrate on each region

if (file.exists(paste0("../data/sumstats", output.suffix, ".csv"))) {
    completed <- unique(read.csv(paste0("../data/sumstats", output.suffix, ".csv"))$region)
} else {
    completed <- c()
}
length(completed)

count <- 0
fit <- NA
for (region in unique(df$Munic�pio)) {
    count <- count + 1
    if (region %in% completed)
        next

    completed <- c(completed, region)

    subdf <- df[df$Munic�pio == region & !is.na(df$edd1000),]
    if (nrow(subdf) < 4)
        next

    myfes <- sapply(levels(factor(subdf$Munic�pio)), function(region) linfes$effect[linfes$idx == region])
    myfes.se <- sapply(levels(factor(subdf$Munic�pio)), function(region) linfes$robustse[linfes$idx == region])

    if (length(myfes) == 1) {
        myfes <- array(myfes, 1)
        myfes.se <- array(myfes.se, 1)
    }

    stan.data <- list(N=nrow(subdf), T = nrow(subdf), K = length(weatherpreds), M = 1,
                      region=rep(1, nrow(subdf)), year2000=subdf$year - 2000,
                      weather_yield = subdf[, weatherpreds],
                      production = subdf$total.produce, harvest = subdf$total.harvest)

    init <- function() {
        yield_fe <- rnorm(length(unique(subdf$Munic�pio)), myfes, myfes.se)

        if (length(unique(subdf$Munic�pio)) == 1) {
            yield_fe <- array(yield_fe, 1)
        }
        list(yield_fe=yield_fe,
             scale_trend=rnorm(1, linmod$coefficients[1], linmod$cse[1]),
             yield_coeff=as.numeric(mvrnorm(1, olsmod$coefficients[1:stan.data$K], olsmod$vcv[1:stan.data$K, 1:stan.data$K])))
    }

    if (is.na(fit)) {
        fit <- stan(model_code = stan.model.ols, data = stan.data,
                    init=init, iter = 2000, chains = 8, open_progress=F)
    } else {
        fit <- stan(fit=fit, data = stan.data,
                    init=init, iter = 2000, chains = 8, open_progress=F)
    }

    print(fit)

    la <- extract(fit, permute=T)
    if (is.null(la)) {
        if (file.exists(paste0("../data/sumstats", output.suffix, ".csv")))
            write.table(data.frame(param="lp__", region, mean=NA, se_mean=NA, sd=NA, `2.5%`=NA, `25%`=NA, `50%`=NA, `75%`=NA, `97.5%`=NA, neff=NA, Rhat=NA), paste0("../data/sumstats", output.suffix, ".csv"), sep=",", row.names=F, col.names=F, append=T)
        else
            write.table(data.frame(param="lp__", region, mean=NA, se_mean=NA, sd=NA, `2.5%`=NA, `25%`=NA, `50%`=NA, `75%`=NA, `97.5%`=NA, neff=NA, Rhat=NA), paste0("../data/sumstats", output.suffix, ".csv"), sep=",", row.names=F)
        next
    }

    if (length(weatherpreds) == 6) {
        df.vmc <- data.frame(region, yield_fe=la$yield_fe, scale_trend=la$scale_trend,
                             yield_coeff1=la$yield_coeff[,1],
                             yield_coeff2=la$yield_coeff[,2], yield_coeff3=la$yield_coeff[,3],
                             yield_coeff4=la$yield_coeff[,4], yield_coeff5=la$yield_coeff[,5],
                             yield_coeff6=la$yield_coeff[,6], yield_sigma=la$yield_sigma)
    } else {
        df.vmc <- data.frame(region, yield_fe=la$yield_fe, scale_trend=la$scale_trend,
                             yield_coeff1=la$yield_coeff[,1],
                             yield_coeff2=la$yield_coeff[,2], yield_coeff3=la$yield_coeff[,3],
                             yield_coeff4=la$yield_coeff[,4], yield_coeff5=la$yield_coeff[,5],
                             yield_sigma=la$yield_sigma)
    }

    if (file.exists(paste0("../data/allfits", output.suffix, ".csv")))
        write.table(df.vmc, paste0("../data/allfits", output.suffix, ".csv"), sep=",", row.names=F, col.names=F, append=T)
    else
        write.table(df.vmc, paste0("../data/allfits", output.suffix, ".csv"), sep=",", row.names=F)

    sumstat <- summary(fit)$summary
    if (file.exists(paste0("../data/sumstats", output.suffix, ".csv")))
        write.table(cbind(region, sumstat), paste0("../data/sumstats", output.suffix, ".csv"), sep=",", row.names=T, col.names=F, append=T)
    else
        write.table(cbind(region, sumstat), paste0("../data/sumstats", output.suffix, ".csv"), sep=",", row.names=T)
}

